Question 1

By using loyalty and credit card data, we can observe that the top three most visited places during the 2 weeks data are:

  1. Katerina’s Cafe
  2. Hippokampos
  3. Guy’s Gyros

As we can see in the figure below, the number of daily frequency from credit card data and loyalty data are different. Based on credit card data Katerina’s Cafe were most popular in 2014-06-01…

cc_freq <- group_by(cc_data, location) %>% summarise(freq = n())%>% arrange(desc(freq))
cc_freq
## # A tibble: 34 x 2
##    location              freq
##    <chr>                <int>
##  1 Katerina's Cafe        212
##  2 Hippokampos            171
##  3 Guy's Gyros            158
##  4 Brew've Been Served    156
##  5 Hallowed Grounds        92
##  6 Ouzeri Elian            87
##  7 Abila Zacharo           72
##  8 Gelatogalore            64
##  9 Kalami Kafenion         64
## 10 Bean There Done That    47
## # ... with 24 more rows
loyalty_freq <- group_by(loyalty_data, location) %>% summarise(freq = n())%>% arrange(desc(freq))
loyalty_freq
## # A tibble: 33 x 2
##    location              freq
##    <chr>                <int>
##  1 Katerina's Cafe        195
##  2 Hippokampos            155
##  3 Guy's Gyros            146
##  4 Brew've Been Served    140
##  5 Ouzeri Elian            84
##  6 Hallowed Grounds        80
##  7 Abila Zacharo           71
##  8 Kalami Kafenion         66
##  9 Gelatogalore            60
## 10 Bean There Done That    45
## # ... with 23 more rows
cc_freq_daily <- group_by(cc_data, timestamp_date, location) %>% summarise(freq = n())%>% arrange(desc(freq))
top3_place = c("Katerina's Cafe", "Hippokampos","Guy's Gyros")

cc_freq_daily_selected <- cc_freq_daily %>% filter(location %in% top3_place)

top3_vis_cc <- ggplot(cc_freq_daily_selected, aes(fill=as.character(timestamp_date), y=freq, x=location)) + 
    geom_bar(position="stack", stat="identity") +
    geom_text(label = cc_freq_daily_selected$freq, position=position_stack(vjust=0.5))+
    ggtitle("CC data top 3 location")

ggplotly(top3_vis_cc)
loyalty_freq_daily <- group_by(loyalty_data, timestamp, location) %>% summarise(freq = n())%>% arrange(desc(freq))
top3_place = c("Katerina's Cafe", "Hippokampos","Guy's Gyros")

loyalty_freq_daily_selected <- loyalty_freq_daily %>% filter(location %in% top3_place)

top3_vis_L <- ggplot(loyalty_freq_daily_selected, aes(fill=as.character(timestamp), y=freq, x=location)) + 
    geom_bar(position="stack", stat="identity") +
    geom_text(label = loyalty_freq_daily_selected$freq, position=position_stack(vjust=0.5))+
    ggtitle("Loyalty data top 3 location")

ggplotly(top3_vis_L)

##Question 2

gps_data <- read_csv("MC2/gps.csv")

gps_data$Timestamp_date <- date_time_parse(gps_data$Timestamp,
                                zone = "",
                                format = "%m/%d/%Y")

gps_data$Timestamp <- date_time_parse(gps_data$Timestamp,
                                zone = "",
                                format = "%m/%d/%Y %H:%M")
gps_data$Time <- format(gps_data$Timestamp, format = "%H:%M")
gps_data$Hour <- get_hour(gps_data$Timestamp)


gps_sort <- gps_data[order(gps_data$Timestamp, gps_data$id),]

stop_fin_2 = data.frame()

for (id_1 in unique(gps_sort$id))
{
  make_stop = data.frame()
  gps_id <- gps_sort %>%
  filter(id==id_1)
  
  for(i in 1:nrow(gps_id))
  {
    if(i < nrow(gps_id))
    {
      start <- gps_id$Timestamp[i]
      end <- gps_id$Timestamp[i+1]
    } else
    {
      start <- gps_id$Timestamp[i]
      end <- gps_id$Timestamp[i]
      print(start)
    }
    elapsed.time <- start%--%end
    time_duration <- as.duration(elapsed.time)
    
    if(time_duration > 300)
    {
      make_stop = rbind(make_stop,cbind(gps_id[i,],time_duration,i),cbind(gps_id[i+1,],time_duration,i))
    }
    
  }
  
  stop_fin_2 = rbind(stop_fin_2, make_stop)
}
## [1] "2014-01-19 20:07:00 +08"
## [1] "2014-01-19 16:06:00 +08"
## [1] "2014-01-19 19:48:00 +08"
## [1] "2014-01-19 19:47:00 +08"
## [1] "2014-01-19 20:11:00 +08"
## [1] "2014-01-19 19:47:00 +08"
## [1] "2014-01-19 20:24:00 +08"
## [1] "2014-01-19 15:57:00 +08"
## [1] "2014-01-19 19:53:00 +08"
## [1] "2014-01-19 20:21:00 +08"
## [1] "2014-01-19 20:38:00 +08"
## [1] "2014-01-19 19:58:00 +08"
## [1] "2014-01-19 20:02:00 +08"
## [1] "2014-01-18 15:52:00 +08"
## [1] "2014-01-19 19:49:00 +08"
## [1] "2014-01-19 14:11:00 +08"
## [1] "2014-01-19 19:56:00 +08"
## [1] "2014-01-19 18:25:00 +08"
## [1] "2014-01-19 19:49:00 +08"
## [1] "2014-01-19 20:09:00 +08"
## [1] "2014-01-19 20:20:00 +08"
## [1] "2014-01-18 20:21:00 +08"
## [1] "2014-01-19 20:27:00 +08"
## [1] "2014-01-19 14:32:00 +08"
## [1] "2014-01-19 20:03:00 +08"
## [1] "2014-01-16 17:50:00 +08"
## [1] "2014-01-19 20:38:00 +08"
## [1] "2014-01-19 19:46:00 +08"
## [1] "2014-01-19 20:04:00 +08"
## [1] "2014-01-19 20:56:00 +08"
## [1] "2014-01-19 20:36:00 +08"
## [1] "2014-01-19 20:11:00 +08"
## [1] "2014-01-19 19:54:00 +08"
## [1] "2014-01-19 19:46:00 +08"
## [1] "2014-01-19 20:25:00 +08"
## [1] "2014-01-17 17:21:00 +08"
## [1] "2014-01-16 21:06:00 +08"
## [1] "2014-01-16 19:07:00 +08"
## [1] "2014-01-16 20:00:00 +08"
## [1] "2014-01-19 19:50:00 +08"
elapsed.time
## [1] 2014-01-19 19:50:00 +08--2014-01-19 19:50:00 +08
location_point = read_csv("MC2/stop_fin.csv") %>% select(lat,long,Possible_Location)%>% unique()
stop_fin_2$lat = round(stop_fin_2$lat,5)
stop_fin_2$long = round(stop_fin_2$long,5)

stop_final = merge(stop_fin_2,location_point, by.x = c("lat","long"), by.y = c("lat","long"), all.x = TRUE)

stop_fin <- stop_final


stop_fin_location <- stop_fin[!(stop_fin$Possible_Location %like% 'mice'),]

gps_sf_stopfin <- st_as_sf(stop_fin_location,
                   coords = c("long","lat"),
                   crs=4326)
gps_sf_stopfin_unique1 <- gps_sf_stopfin  %>% select(Possible_Location,geometry)%>%unique()%>%arrange(Possible_Location)

gps_sf_stopfin_unique <- gps_sf_stopfin_unique1[-c(221,476,644,1100,1103),]

gps_path_stopfin <- gps_sf_stopfin_unique %>%
  group_by(Possible_Location) %>%
  summarize(m_Timestamp = mean(geometry),
            do_union = FALSE ) %>%
  st_cast("LINESTRING")


Abila_st <- st_read(dsn = "MC2/Geospatial",
                    layer = "Abila")
## Reading layer `Abila' from data source 
##   `C:\jovinkahartanto\assignment_distill\MC2\Geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 3290 features and 9 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: 24.82401 ymin: 36.04502 xmax: 24.90997 ymax: 36.09492
## Geodetic CRS:  WGS 84
P <- npts(Abila_st, by_feature = TRUE)
abila_st_2 <- cbind(Abila_st,P) %>% filter(P > 1)
bgmap <- raster("MC2/Geospatial/MC2-tourist.TIF")

tmap_mode("view")
tm_shape(bgmap) +
  tm_rgb(bgmap, r = 1, g = 2, b = 3,
         alpha = NA,
         saturation = 1,
         interpolate = TRUE,
         max.value = 255) +
  tm_shape(abila_st_2) +
  tm_lines(col = "red", scale = 1)+
  tm_shape(gps_path_stopfin[9,]) +
  tm_lines(col = "blue", scale =4, interactive = TRUE) +
  tm_text("Possible_Location", size = 3, remove.overlap = TRUE, overwrite.lines = TRUE, just = "top")
gps_data <- read_csv("MC2/gps.csv")

gps_data$Timestamp_date <- date_time_parse(gps_data$Timestamp,
                                zone = "",
                                format = "%m/%d/%Y")

gps_data$Timestamp <- date_time_parse(gps_data$Timestamp,
                                zone = "",
                                format = "%m/%d/%Y %H:%M")
gps_data$Time <- format(gps_data$Timestamp, format = "%H:%M")
gps_data$Hour <- get_hour(gps_data$Timestamp)


gps_sf <- st_as_sf(gps_data,
                   coords = c("long","lat"),
                   crs=4326)

gps_path <- gps_sf %>%
  group_by(id, Timestamp_date,Hour) %>%
  summarize(m_Timestamp = mean(Timestamp_date),
            do_union = FALSE ) %>%
  st_cast("LINESTRING")


Abila_st <- st_read(dsn = "MC2/Geospatial",
                    layer = "Abila")
## Reading layer `Abila' from data source 
##   `C:\jovinkahartanto\assignment_distill\MC2\Geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 3290 features and 9 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: 24.82401 ymin: 36.04502 xmax: 24.90997 ymax: 36.09492
## Geodetic CRS:  WGS 84
P <- npts(Abila_st, by_feature = TRUE)
abila_st_2 <- cbind(Abila_st,P) %>% filter(P > 1)
gps_path_selected <- gps_path %>%
  filter(id==28)


bgmap <- raster("MC2/Geospatial/MC2-tourist.TIF")

tmap_mode("view")
tm_shape(bgmap) +
  tm_rgb(bgmap, r = 1, g = 2, b = 3,
         alpha = NA,
         saturation = 1,
         interpolate = TRUE,
         max.value = 255) +
  tm_shape(abila_st_2) +
  tm_lines(col = "red", scale = 1)+
  tm_shape(gps_path_selected[1:4,]) +
  tm_lines(col = "black", scale =4, interactive = TRUE) +
  tm_text("Hour", size = 3, remove.overlap = TRUE, overwrite.lines = TRUE, just = "top")